#!/bin/bash 

usage()
{
    echo "Usage: rcMergeRepeats -f genome.fasta -s species -d output_directory -u RepeatMasker.out -q RepeatMasker.tbl -c repeatcraft.cfg -t ThreadNum -b dictionary -m yes|no | -h"
}


while getopts f:s:d:u:q:t:b:m:h option
    do
        case "${option}"
        in
        f) genome=${OPTARG};;
        s) spe=${OPTARG};;
        d) dir=${OPTARG};;
        u) rmOut=${OPTARG};;
        q) table=${OPTARG};;
        t) threads=${OPTARG};;
        b) dict=${OPTARG};;
        m) cutoff=${OPTARG};;
        h) usage; exit;; 
        esac
            done

##### Subprocess ltrfinder

ltrfinder()
{
    mkdir ${dir}/${spe}LtrFinder/
    cd ${dir}/${spe}LtrFinder/
    ${SCRIPT_DIR}/LTR_FINDER_parallel -seq $genome -threads $threads 
    cd ${dir}
}

##### Subprocess convert rmOut to GFF3 and then to GFF2

convert()
{
    rmOutToGFF3.pl $rmOut > $dir/${spe}.out.gff3;
    cat $dir/${spe}.out.gff3 | sed '1,2d; s/Target=/Target "Motif:/g' | awk '{OFS="\t"}{gsub(/$/,"\"", $10); print}' | sed "s/Target\t/Target /g; 1s/^/##gff-version 2\n##date xxxx-xx-xx\n/; s/\(.*\)\t/\1 /; s/\(.*\)\t/\1 /" > ${dir}/${spe}.out.gff2
    sed -i '/##sequence-region.*/d' ${dir}/${spe}.out.gff2
}


##### Subprocess repeatcraft

repeatcraft()
{
    mkdir ${dir}/${spe}RepeatCraft/
    sed "s#ltr_finder_gff:.*#ltr_finder_gff: ${dir}/${spe}LtrFinder/$(basename ${genome}).finder.combine.gff3 #g" ${SCRIPT_DIR}/repeatCraft/example/repeatcraft.cfg > ${dir}/${spe}RepeatCraft/repeatcraft.cfg
    python ${SCRIPT_DIR}/repeatCraft/repeatcraft.py -r ${dir}/${spe}.out.gff2 -u $rmOut -c ${dir}/${spe}RepeatCraft/repeatcraft.cfg -o ${dir}/${spe} -m strict
}

##### Subprocess mergeRepeats

mergeRepeats()
{
    sort -s -k1,1 ${dir}/${spe}.rmerge.gff > ${dir}/${spe}.rmerge.gff.sorted
    #filter-gff overlap -v --progress -s 1 -d -t -c length -a length ${dir}/${spe}.rmerge.gff.sorted ${dir}/${spe}.rmerge.gff.filtered
    Rscript ${SCRIPT_DIR}/filteringOverlappingRepeats.R ${dir}/${spe}.rmerge.gff.sorted ${dir}/${spe}.rmerge.gff.filtered
    Rscript ${SCRIPT_DIR}/mergeRepeats.R  ${dir}/${spe}.rmerge.gff.filtered ${dir}/${spe}.mergedRepeats.bed $genSize ${dir}/${spe}.mergedRepeats.revisedTable ${dir}/${spe}.filteredRepeats.bed ${dir}/${spe}.filteredRepeats.summary $cutoff
}             


##### Subprocess scafDeedPoll
scafDeedPoll()
{
    cp ${dir}/${spe}.filteredRepeats.bed ${dir}/${spe}.filteredRepeats.bed.bak
    python ${SCRIPT_DIR}/backSwap.py ${dir}/${spe}.filteredRepeats.bed ${dict} ${dir}/${spe}.filteredRepeats.bed.2
    mv ${dir}/${spe}.filteredRepeats.bed{.2,}
    python ${SCRIPT_DIR}/backSwapGFF.py ${dir}/${spe}.rmerge.gff.filtered ${dict} ${dir}/${spe}.rmerge.gff.filtered.2
    mv ${dir}/${spe}.rmerge.gff.filtered{.2,}
    Rscript ${SCRIPT_DIR}/makeGff.R ${dir}/${spe}.filteredRepeats.bed ${dir}/${spe}.rmerge.gff.filtered ${dir}/${spe}.filteredRepeats.gff
}

##### Subprocess Running Cow
runningCow()
{
	echo "    
              )  (
	     (   ) )
	     ) ( (
	   _______)_
	.-'---------|  
       ( C|/\/\/\/\/|
	'-./\/\/\/\/|
	 '_________'
	  '-------'
	<<< $stage >>>"
}


##### Subprocess Size
genomesize()
{
    genSize=$(sed -n '4p' $table | rev | cut -f1,1 -d ':' | rev | sed 's/ bp.*//g; s/ //g')
}

##### Main

# Step 1 - Run ltr_finder

SCRIPT_DIR=/home/toby/projects/earlGreyREwrite/EarlGreyUpdate/scripts
stage="Running LTR Finder"
runningCow
ltrfinder 

# Step 2 - Generate all respective GFF2 files
 
stage="Generating GFF Files"
runningCow
convert

# Step 3 - Run RepeatCraft

stage="Running RepeatCraft"
runningCow
repeatcraft

# Step 4 - Merge mergedRepeats

stage="Resolving Overlapping Repeats"
runningCow
genomesize
mergeRepeats
scafDeedPoll

# Step 5 - Done

stage="Done!"
runningCow

